import jieba

# 用jieba的精确模式分词,统计分词后的中文词语语频
if __name__ == '__main__':
    fi = open('data3.txt','r',encoding='utf-8')
    s = fi.read()
    k = jieba.lcut(s)
    d1={}
    for i in k:
        if len(i) >=2:
            d1[i]=d1.get(i,0)+1
    ld = list(d1.items())
    ld.sort(key = lambda x:x[1],reverse=True)
    # 统计文件中出现词频最多的前10个长度不小于2个字符的词语，
    for j in ld[:10]:
        print("{}:{}".format(j[0],j[1]))